package com.shujia.spark.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo5Sample {
  def main(args: Array[String]): Unit = {
    /**
      * 创建spark环境
      *
      */

    val conf: SparkConf = new SparkConf()
      .setMaster("local")
      .setAppName("flatMap")


    val sc = new SparkContext(conf)

    //读取文件
    val students: RDD[String] = sc.textFile("data/students.txt")

    /**
      * sample: 抽样
      *
      */
    val sampleRDD: RDD[String] = students.sample(true, 0.1)

    sampleRDD.foreach(println)

  }
}
